"""
演示RDD成员方法distinct的使用
功能:对RDD数据进行去重,返回新的RDD
"""

from pyspark import SparkContext,SparkConf
import os
os.environ["PYSPARK_PYTHON"] = "C:/Users/86131/AppData/Local/Programs/Python/Python39/python.exe"
conf = SparkConf().setMaster("local[*]").setAppName("test_spark")
sc = SparkContext(conf=conf)
#准备一个RDD
rdd = sc.parallelize([1, 2, 3, 3, 3, 5, 4, 5, 6, 7, 8, 9,10])
#对RDD数据进行去重
rdd2 = rdd.distinct()
print(rdd2.collect())

#关闭SparkContext对象
sc.stop()
